import constdef
import requests
import xlwt
from lxml import etree


def new_excel(sheet_name: str):
    workbook = xlwt.Workbook(encoding='utf-8')
    worksheet = workbook.add_sheet(sheet_name)
    return workbook, worksheet


def get_response_text(url: str):
    resp = requests.get(url=url)
    if resp.text is None:
        print("crawl url error...")
        return
    return resp.text


def parse_text(text: str):
    html = etree.HTML(text)

    hrefs = html.xpath("//*[@class=\"lh250\"]//a/@href")

    return hrefs


def parse_page(text: str):
    html = etree.HTML(text)

    name = html.xpath("//div[@class=\"PeopleDetail\"]//p[1]//text()")
    if name is None or name == []:
        name = ""
    else:
        name = "".join(name[0].strip().split())
    telephone = html.xpath("//div[@class=\"PeopleDetail\"]//p[3]//text()")
    if telephone is None or telephone == []:
        telephone = ""
    else:
        telephone = telephone[0].strip()
    email = html.xpath("//div[@class=\"PeopleDetail\"]//p[5]/a/@href")
    if email is None or email == []:
        email = ""
    else:
        email = email[0].strip()
    lab = html.xpath("//div[@class=\"PeopleDetail\"]//p[6]//text()")
    if len(lab) > 1:
        lab = lab[1].strip()
    else:
        lab = ""
    target = html.xpath("//div[@id=\"con_one_1\"]//text()")
    if target is None or target == []:
        target = ""
    else:
        target = "".join("".join(target).split())

    return name, telephone, email, lab, target


def save_excel(worksheet, i, name, telephone, email, lab, target):
    worksheet.write(i, 0, label=name)
    worksheet.write(i, 1, label=telephone)
    worksheet.write(i, 2, label=email)
    worksheet.write(i, 3, label=lab)
    worksheet.write(i, 4, label=target)


def main():
    text = get_response_text(constdef.MAIN_PAGE)
    urls = parse_text(text)
    workbook, worksheet = new_excel("SJTU")
    for i in range(len(urls)):
        url = urls[i]
        page = get_response_text(url=constdef.URL + url)
        name, telephone, email, lab, target = parse_page(page)
        save_excel(worksheet, i, name, telephone, email, lab, target)
    workbook.save("SJTU.xls")


if __name__ == '__main__':
    main()
